{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "39329df3-1f99-4b11-9405-5969d52368a7",
   "metadata": {},
   "source": [
    "# Decision Tree & Optuna Example"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7d5f0ab-33cd-40f2-82e7-fb2747f04f89",
   "metadata": {},
   "source": [
    "Example showing how to use the Optuna library (https://optuna.readthedocs.io/en/stable/) for Bayesian hyperparameter optimization (via tree of parzen estimator)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7f61a90e-a119-4bd0-af21-38604c5b4eec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scikit-learn: 1.0\n",
      "optuna      : 2.10.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "%load_ext watermark\n",
    "%watermark -p scikit-learn,optuna"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f0489c2-dd9c-4e71-a78c-e01201762b37",
   "metadata": {},
   "source": [
    "## Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "271b17ff-5ea4-4161-8b7f-20ba8131d666",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train/Valid/Test sizes: 398 80 171\n"
     ]
    }
   ],
   "source": [
    "from sklearn import model_selection\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import datasets\n",
    "\n",
    "\n",
    "data = datasets.load_breast_cancer()\n",
    "X, y = data.data, data.target\n",
    "\n",
    "X_train, X_test, y_train, y_test = \\\n",
    "    train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)\n",
    "\n",
    "X_train_sub, X_valid, y_train_sub, y_valid = \\\n",
    "    train_test_split(X_train, y_train, test_size=0.2, random_state=1, stratify=y_train)\n",
    "\n",
    "print('Train/Valid/Test sizes:', y_train.shape[0], y_valid.shape[0], y_test.shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0affc454-9f07-48e6-bcee-e6253d968247",
   "metadata": {},
   "source": [
    "## Hyperopt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "53282fd6-1292-4b4d-a0b7-980707d61c3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from hyperopt import Trials, STATUS_OK, tpe, hp, fmin\n",
    "import hyperopt.pyll.stochastic"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5435889f-3cd7-45cd-abb2-632e3b034194",
   "metadata": {},
   "source": [
    "Some random sampling examples:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7ca6f8f6-0c78-434a-8121-a83b5708e143",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2.083641453103286"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hyperopt.pyll.stochastic.sample(hp.loguniform('test', 1e-5, 1)) # range e^{low} to e^{high}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b2adc867-2d5a-44bd-8115-195ed53d6a7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hyperopt.pyll.stochastic.sample(hp.qloguniform('test', 1e-5, 1, 0.1)) # rounded to 0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9a6bb270-d2a1-4179-a770-39bad5a8332c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "import numpy as np\n",
    "import optuna\n",
    "\n",
    "\n",
    "def optimization_objective(trial, X_train, y_train, cv=5):\n",
    "\n",
    "    \n",
    "    params =  {\n",
    "        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),\n",
    "        'min_impurity_decrease': trial.suggest_uniform('min_impurity_decrease', 0.0, 0.5),\n",
    "        'max_depth': trial.suggest_categorical('max_depth', [6, 16, None])\n",
    "    }\n",
    "\n",
    "    cv_iterator = StratifiedKFold(n_splits=cv, shuffle=True, random_state=123)\n",
    "\n",
    "    cv_scores = np.zeros(cv)\n",
    "    for idx, (train_sub_idx, valid_idx) in enumerate(cv_iterator.split(X_train, y_train)):\n",
    "        \n",
    "        X_train_sub, X_valid = X_train[train_sub_idx], X_train[valid_idx]\n",
    "        y_train_sub, y_valid = y_train[train_sub_idx], y_train[valid_idx]\n",
    "        \n",
    "\n",
    "        model = DecisionTreeClassifier(**params, random_state=123)\n",
    "        model.fit(X_train_sub, y_train_sub)\n",
    "        preds = model.score(X_valid, y_valid)\n",
    "        \n",
    "        cv_scores[idx] = preds\n",
    "\n",
    "    return np.mean(cv_scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "a51829c6-234f-401f-84ed-a005f71d0150",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m[I 2021-10-29 14:35:16,809]\u001b[0m A new study created in memory with name: DT Classifier\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,818]\u001b[0m Trial 0 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 8, 'min_impurity_decrease': 0.10833033823891053, 'max_depth': 6}. Best is trial 0 with value: 0.8969303797468354.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,831]\u001b[0m Trial 1 finished with value: 0.9346518987341772 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.004444131707066423, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,837]\u001b[0m Trial 2 finished with value: 0.6281645569620252 and parameters: {'min_samples_split': 10, 'min_impurity_decrease': 0.3816433707073382, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,845]\u001b[0m Trial 3 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.26864271459806505, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,851]\u001b[0m Trial 4 finished with value: 0.6281645569620252 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.37434538267555795, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,859]\u001b[0m Trial 5 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.20496048074326506, 'max_depth': 16}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,867]\u001b[0m Trial 6 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 7, 'min_impurity_decrease': 0.06256629463040425, 'max_depth': 6}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,875]\u001b[0m Trial 7 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 5, 'min_impurity_decrease': 0.1765935104059193, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,881]\u001b[0m Trial 8 finished with value: 0.6281645569620252 and parameters: {'min_samples_split': 10, 'min_impurity_decrease': 0.4196563416203753, 'max_depth': 6}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,886]\u001b[0m Trial 9 finished with value: 0.6281645569620252 and parameters: {'min_samples_split': 7, 'min_impurity_decrease': 0.369164233342226, 'max_depth': 6}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,902]\u001b[0m Trial 10 finished with value: 0.9346518987341772 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.004803822715951461, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,918]\u001b[0m Trial 11 finished with value: 0.9346518987341772 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.003167227551493933, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,930]\u001b[0m Trial 12 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.023000973415986743, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,941]\u001b[0m Trial 13 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.12195191943454853, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,948]\u001b[0m Trial 14 finished with value: 0.6281645569620252 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.4973859788367999, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,959]\u001b[0m Trial 15 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.09370448691476807, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,971]\u001b[0m Trial 16 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 5, 'min_impurity_decrease': 0.27758354994140044, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,982]\u001b[0m Trial 17 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.1641810636479714, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:16,994]\u001b[0m Trial 18 finished with value: 0.9271202531645569 and parameters: {'min_samples_split': 6, 'min_impurity_decrease': 0.04880416933445276, 'max_depth': None}. Best is trial 1 with value: 0.9346518987341772.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,010]\u001b[0m Trial 19 finished with value: 0.9371835443037975 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.007392743931707742, 'max_depth': None}. Best is trial 19 with value: 0.9371835443037975.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,021]\u001b[0m Trial 20 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 5, 'min_impurity_decrease': 0.2205438268130882, 'max_depth': None}. Best is trial 19 with value: 0.9371835443037975.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,036]\u001b[0m Trial 21 finished with value: 0.9397151898734177 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.006252798709964645, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,045]\u001b[0m Trial 22 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.07287879252706704, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,057]\u001b[0m Trial 23 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.12881763215629966, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,067]\u001b[0m Trial 24 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.061430731137942486, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,081]\u001b[0m Trial 25 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.03358620333481756, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,097]\u001b[0m Trial 26 finished with value: 0.9346518987341772 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.0009587382406495227, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,107]\u001b[0m Trial 27 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.14723700453120717, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,118]\u001b[0m Trial 28 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.08778905247461954, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,128]\u001b[0m Trial 29 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 9, 'min_impurity_decrease': 0.31448671579386195, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,140]\u001b[0m Trial 30 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 6, 'min_impurity_decrease': 0.10675422203765972, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,155]\u001b[0m Trial 31 finished with value: 0.9346518987341772 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.003376035074489332, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,167]\u001b[0m Trial 32 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.034138313371878955, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,180]\u001b[0m Trial 33 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.03962449028427482, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,191]\u001b[0m Trial 34 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.07467450441564497, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,205]\u001b[0m Trial 35 finished with value: 0.9371518987341773 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.01518707108082638, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,218]\u001b[0m Trial 36 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.023632665680835302, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,230]\u001b[0m Trial 37 finished with value: 0.9120253164556962 and parameters: {'min_samples_split': 5, 'min_impurity_decrease': 0.05103872049659544, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,241]\u001b[0m Trial 38 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.19443123098049367, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,251]\u001b[0m Trial 39 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.10465220189935368, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,262]\u001b[0m Trial 40 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 7, 'min_impurity_decrease': 0.14535512921836374, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,277]\u001b[0m Trial 41 finished with value: 0.9371835443037975 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.0015529480975161282, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,294]\u001b[0m Trial 42 finished with value: 0.9346518987341772 and parameters: {'min_samples_split': 6, 'min_impurity_decrease': 0.0017576537374890712, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,307]\u001b[0m Trial 43 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 4, 'min_impurity_decrease': 0.025903009093224565, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,318]\u001b[0m Trial 44 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.06574136532243226, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,331]\u001b[0m Trial 45 finished with value: 0.9371202531645568 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.02222490586359776, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,343]\u001b[0m Trial 46 finished with value: 0.9321202531645569 and parameters: {'min_samples_split': 3, 'min_impurity_decrease': 0.026433959349831834, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,354]\u001b[0m Trial 47 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.08558375411408671, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,365]\u001b[0m Trial 48 finished with value: 0.8969303797468354 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.06055552476510423, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n",
      "\u001b[32m[I 2021-10-29 14:35:17,378]\u001b[0m Trial 49 finished with value: 0.9371202531645568 and parameters: {'min_samples_split': 2, 'min_impurity_decrease': 0.01940642542474149, 'max_depth': None}. Best is trial 21 with value: 0.9397151898734177.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "study = optuna.create_study(direction=\"maximize\", study_name=\"DT Classifier\")\n",
    "\n",
    "def func(trial):\n",
    "    return optimization_objective(trial, X_train, y_train)\n",
    "\n",
    "study.optimize(func, n_trials=50);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2c26399d-ebfc-4b06-86d9-36e49711e908",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best CV accuracy: 0.93972\n",
      "Best params:\n",
      "\tmin_samples_split: 2\n",
      "\tmin_impurity_decrease: 0.006252798709964645\n",
      "\tmax_depth: None\n"
     ]
    }
   ],
   "source": [
    "print(f\"Best CV accuracy: {study.best_value:.5f}\")\n",
    "print(\"Best params:\")\n",
    "\n",
    "for key, value in study.best_params.items():\n",
    "    print(f\"\\t{key}: {value}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "83e99f85-9ce2-494e-99ea-20ab49dc0b15",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(min_impurity_decrease=0.006252798709964645,\n",
       "                       random_state=123)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = DecisionTreeClassifier(random_state=123, **study.best_params)\n",
    "model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "fbb610d8-4846-4e9f-a589-adacd0042603",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training Accuracy: 0.99\n",
      "Test Accuracy: 0.94\n"
     ]
    }
   ],
   "source": [
    "print(f\"Training Accuracy: {model.score(X_train, y_train):0.2f}\")\n",
    "print(f\"Test Accuracy: {model.score(X_test, y_test):0.2f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
